library(data.table)
library(tidyverse)
library(rtracklayer)
library(DESeq2)
library(Biostrings)
library(genomation)
library(RColorBrewer)
library(CLIPanalyze)
bamCoveragebamCoverage from Deeptools was used to generated scaled bw files for plotting peak heatmaps
Scalefactors are calculated as the reciprical of DESeq2 estimated sizeFactors using counnts in genes outside of peaks.
dir.create("PDF_figure/Peak_heatmap_merged", showWarnings = FALSE)
load('../Merged_Analysis/merged_peak_analysis.rda')
sizefactor <- sizeFactors(dds.peaks.hva.hvak)
scalefactor <- (1/sizefactor)/max((1/sizefactor))
scalefactor
## HVA1 HVA2 HVA3 HVA4 HVA5 HVA6 HVAK1 HVAK2
## 0.1080152 0.4181528 0.2468695 0.1090049 1.0000000 0.5320503 0.1065014 0.1526190
## HVAK3 HVAK4 HVAK5 HVAK6
## 0.5110165 0.1886995 0.1138450 0.1566501
bw file generation was done on cluster.
mirs.peaks <- readRDS("Datafiles/miRNA-merged-peaks-list-09282019.rds")
Prepare bw file for visualization
# combine bigwig files of the same genotype
$ cd ../Rescaled analysis/HVA
$ bigWigMerge *.bw HVA_merged.bedGraph
$ cd ../Rescaled analysis/HVAK
$ bigWigMerge *.bw HVAK_merged.bedGraph
convert chromosome name from Ensembl to UCSC format.
chromosomename <- read.table("../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/Ensembl.mm10.chrom.sizes.txt", sep = "\t")[,1][1:22]
hva_bed <- read.table("../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVA/HVA_merged.bedGraph", sep = "\t")
hva_bed <- hva_bed %>% filter(V1 %in% chromosomename)
hva_bed$V1 <- paste0("chr", hva_bed$V1)
hva_bed$V1[hva_bed$V1 == "chrMT"] <- "chrM"
write_delim(hva_bed, file = "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVA_merged_edited.bedGraph", delim = "\t", col_names = F)
hvak_bed <- read.table("../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVAK/HVAK_merged.bedGraph", sep = "\t")
hvak_bed <- hvak_bed %>% filter(V1 %in% chromosomename)
hvak_bed$V1 <- paste0("chr", hvak_bed$V1)
hvak_bed$V1[hvak_bed$V1 == "chrMT"] <- "chrM"
write_delim(hvak_bed, file = "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVAK_merged_edited.bedGraph", delim = "\t", col_names = F)
# bedGraph to bigWig conversion
$ awk 'NR!=1' HVA_merged_edited.bedGraph > input.HVA_merged.bedGraph
$ sort -k1,1 -k2,2n input.HVA_merged.bedGraph > sorted.input.HVA_merged.bedGraph
$ bedGraphToBigWig sorted.input.HVA_merged.bedGraph mm10.chrom.sizes HVA_merged.bw
$ awk 'NR!=1' HVAK_merged_edited.bedGraph > input.HVAK_merged.bedGraph
$ sort -k1,1 -k2,2n input.HVAK_merged.bedGraph > sorted.input.HVAK_merged.bedGraph
$ bedGraphToBigWig sorted.input.HVAK_merged.bedGraph mm10.chrom.sizes HVAK_merged.bw
peaks <- mirs.peaks
peaks <- lapply(peaks, FUN = function(x) {x[order(x$count, decreasing = T)]})
mybw.dir <- "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor"
mybw.files <- list.files(mybw.dir, pattern = "bw$", full.names = T)
print(mybw.files)
## [1] "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVA_merged.bw"
## [2] "../Rescaled analysis/Data/rescaled_bw_deseq_sizefactor/HVAK_merged.bw"
cols <- brewer.pal(name = "Set2", n = 8)
reds <- brewer.pal(name = "Reds", n = 9)
blues <- brewer.pal(name = "Blues", n = 9)
mycolors <-c(blues[6], reds[6]) #HVA, HVAK
light.colors <- alpha(c(blues[3], reds[2]), 0.4)
histogram plot function
#plot histograms
peaks_meta <- function(mypeaks = peaks,
miRNA_family = "miR-451a",
dispersion = "se",
dispersion.col = NULL,
coordinates = c(-400, 400),
line.col = mycolors,
winsorize = c(0,99),
title = ""){
suppressWarnings(mypeaks <- lapply(mypeaks, FUN = function(x) {resize(x, width = sum(abs(coordinates)), fix="center")}))
mypeaks <- GRangesList(mypeaks)
mysml <- ScoreMatrixList(targets=mybw.files, window=mypeaks[[miRNA_family]], type = "bigWig", strand.aware = T)
mysampleInfo <- data.frame(basename(mybw.files), c("HVA", "HVAK"))
names(mysampleInfo) = c("sample", "genotype")
names(mysml) = mysampleInfo$genotype[match(names(mysml), mysampleInfo$sample)]
plotMeta(mysml, profile.names = c("HVA", "HVAK"), xcoords = coordinates, dispersion = dispersion, main =title, line.col = line.col, winsorize = winsorize, dispersion.col = dispersion.col)
}
len <- sapply(mirs.peaks, function(x) length(x))
mirs.peaks <- mirs.peaks[order(-len)]
mirna <- names(mirs.peaks)
for (i in 1:10) {
peaks_meta(mypeaks = peaks, dispersion = NULL, miRNA_family = mirna[i], title = mirna[i])
peaks_meta(mypeaks = peaks, dispersion = "se", miRNA_family = mirna[i], title = mirna[i],
dispersion.col = light.colors)
}
pdf("PDF_figure/Peak_heatmap_merged//Histogram_peak.pdf",
height = 4,
width = 6)
for (i in 1:10) {
peaks_meta(mypeaks = peaks, dispersion = NULL, miRNA_family = mirna[i], title = mirna[i])
peaks_meta(mypeaks = peaks, dispersion = "se", miRNA_family = mirna[i], title = mirna[i],
dispersion.col = light.colors)
}
dev.off()
## quartz_off_screen
## 2
plot heatmaps
peaks_heat <- function(mypeaks = peaks,
miRNA_family = "miR-451",
col = blues9,
coordinates = c(-400, 400),
order_rows = F,
winsorize_parameters = c(1,98)){
suppressWarnings(mypeaks <- lapply(mypeaks, FUN = function(x) {resize(x, width = sum(abs(coordinates)), fix="center")}))
mypeaks <- GRangesList(mypeaks)
mysml <- ScoreMatrixList(targets=mybw.files[c(1,2)], window=mypeaks[[miRNA_family]], type = "bigWig", strand.aware = T)
mysampleInfo <- data.frame(basename(mybw.files), c("HVA", "HVAK"))
names(mysampleInfo) = c("sample", "genotype")
names(mysml) = paste(mysampleInfo$genotype[match(names(mysml), mysampleInfo$sample)], miRNA_family , sep = "_")
mysml.scaled = scaleScoreMatrixList(mysml)
#multiHeatMatrix(mysml.scaled, xcoords = coordinates, col = col)
multiHeatMatrix(mysml, common.scale = T, xcoords = coordinates, winsorize = winsorize_parameters, col = col, order = order_rows)
}
colfunc <- colorRampPalette(c("white", "blue"))
mycols <- colfunc(128)
for (i in 1:10) {
peaks_heat(mypeaks = peaks, miRNA_family = mirna[i], col = blues9, winsorize_parameters = c(0,99))
}
pdf("PDF_figure/Peak_heatmap_merged//Heatmap_peak.pdf",
height = 8,
width = 8)
for (i in 1:10) {
peaks_heat(mypeaks = peaks, miRNA_family = mirna[i], col = blues9, winsorize_parameters = c(0,99))
}
dev.off()
## quartz_off_screen
## 2
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] CLIPanalyze_0.0.10 GenomicAlignments_1.24.0
## [3] Rsamtools_2.4.0 GenomicFeatures_1.40.1
## [5] AnnotationDbi_1.50.3 plyr_1.8.6
## [7] RColorBrewer_1.1-2 genomation_1.20.0
## [9] Biostrings_2.56.0 XVector_0.28.0
## [11] DESeq2_1.28.1 SummarizedExperiment_1.18.2
## [13] DelayedArray_0.14.1 matrixStats_0.59.0
## [15] Biobase_2.48.0 rtracklayer_1.48.0
## [17] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [19] IRanges_2.22.2 S4Vectors_0.26.1
## [21] BiocGenerics_0.34.0 forcats_0.5.1
## [23] stringr_1.4.0 dplyr_1.0.6
## [25] purrr_0.3.4 readr_1.4.0
## [27] tidyr_1.1.3 tibble_3.1.2
## [29] ggplot2_3.3.3 tidyverse_1.3.1
## [31] data.table_1.14.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-1 ellipsis_0.3.2 fs_1.5.0
## [4] rstudioapi_0.13 farver_2.1.0 bit64_4.0.5
## [7] fansi_0.5.0 lubridate_1.7.10 xml2_1.3.2
## [10] splines_4.0.3 cachem_1.0.5 impute_1.62.0
## [13] geneplotter_1.66.0 knitr_1.33 jsonlite_1.7.2
## [16] seqPattern_1.20.0 broom_0.7.7 gridBase_0.4-7
## [19] annotate_1.66.0 dbplyr_2.1.1 compiler_4.0.3
## [22] httr_1.4.2 biosignals_0.1.0 backports_1.2.1
## [25] assertthat_0.2.1 Matrix_1.3-4 fastmap_1.1.0
## [28] cli_2.5.0 prettyunits_1.1.1 htmltools_0.5.1.1
## [31] tools_4.0.3 gtable_0.3.0 glue_1.4.2
## [34] GenomeInfoDbData_1.2.3 reshape2_1.4.4 rappdirs_0.3.3
## [37] Rcpp_1.0.6 cellranger_1.1.0 jquerylib_0.1.4
## [40] vctrs_0.3.8 xfun_0.23 rvest_1.0.0
## [43] lifecycle_1.0.0 XML_3.99-0.6 zlibbioc_1.34.0
## [46] scales_1.1.1 BSgenome_1.56.0 hms_1.1.0
## [49] curl_4.3.1 yaml_2.2.1 memoise_2.0.0
## [52] sass_0.4.0 biomaRt_2.44.4 stringi_1.6.2
## [55] RSQLite_2.2.7 highr_0.9 genefilter_1.70.0
## [58] plotrix_3.8-1 BiocParallel_1.22.0 rlang_0.4.11
## [61] pkgconfig_2.0.3 bitops_1.0-7 evaluate_0.14
## [64] lattice_0.20-44 bit_4.0.4 tidyselect_1.1.1
## [67] magrittr_2.0.1 R6_2.5.0 generics_0.1.0
## [70] DBI_1.1.1 pillar_1.6.1 haven_2.4.1
## [73] withr_2.4.2 survival_3.2-11 RCurl_1.98-1.3
## [76] modelr_0.1.8 crayon_1.4.1 KernSmooth_2.23-20
## [79] utf8_1.2.1 BiocFileCache_1.12.1 rmarkdown_2.9
## [82] progress_1.2.2 locfit_1.5-9.4 readxl_1.3.1
## [85] blob_1.2.1 reprex_2.0.0 digest_0.6.27
## [88] xtable_1.8-4 openssl_1.4.4 munsell_0.5.0
## [91] bslib_0.2.5.1 askpass_1.1